Given the detailed information that Baltimore City regularly releases on stuff like crime, I thought it might be possible to “prove” that the ceasefire is having an ameliorative effect on shootings in Baltimore.
#download needed packages you don't have
wants <- c("tidyverse",
"prophet", # for decent plug-and-play time series, and to make sure I'm not totally wildly off-base
"leaflet", # for maps
"geojsonio", # to get polygons for maps
"lubridate",# to process dates
"brms") # for bayesian multilevel modeling
has <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])
#load needed packages
sapply(wants, require, character.only = TRUE)
## tidyverse prophet leaflet geojsonio lubridate brms
## TRUE TRUE TRUE TRUE TRUE TRUE
All of this stuff is coming from https://data.baltimorecity.gov
setwd("/Users/PeterPhalen/Dropbox/manuscripts/ceasefire")
bpd <- read_csv("BPD_Part_1_Victim_Based_Crime_Data.csv")
## Parsed with column specification:
## cols(
## CrimeDate = col_character(),
## CrimeTime = col_character(),
## CrimeCode = col_character(),
## Location = col_character(),
## Description = col_character(),
## `Inside/Outside` = col_character(),
## Weapon = col_character(),
## Post = col_double(),
## District = col_character(),
## Neighborhood = col_character(),
## Longitude = col_double(),
## Latitude = col_double(),
## `Location 1` = col_character(),
## Premise = col_character(),
## crimeCaseNumber = col_logical(),
## `Total Incidents` = col_double()
## )
Ceasefires have been called four times per year in Baltimore since August 2017. These are sometimes called “ceasefire weekends”, but their impact often extends well beyond a few days (one lasted twelve). References for the below dates include https://baltimoreceasefire.com
# first day (Friday) of ceasefire weekends
ceasefire.initial <- as.Date(c("08/04/2017",
"11/03/2017",
"02/02/2018",
"05/11/2018",
"08/03/2018",
"11/02/2018",
"02/01/2019",
"05/10/2019",
"08/02/2019"),
format="%m/%d/%Y")
# then calculate the full weekend
ceasefire.weekends <- ceasefire.initial[1]
for (i in 2:length(ceasefire.initial)){
ceasefire.weekends <- c(ceasefire.weekends,
seq(from=ceasefire.initial[i], by="day", length.out=3))
}
There are multiple crimes documented per day, and each one gets its own row. We subset the crime data down to shootings and then aggregate by day.
bpd$Description <- factor(bpd$Description)
bpd$CrimeDate <- as.Date(bpd$CrimeDate, format = "%m/%d/%Y")
bpd <- subset(bpd, Description == "SHOOTING")
# there are many crimes per day. collapse to daily counts
daily <- bpd %>% group_by(CrimeDate) %>% summarise(shootings = n())
First we’re going to use Facebook’s prophet package to check the rough effect of the ceasefire (coded as a “holiday”), accounting for linear time trends, yearly, and weekly seasonality.
ceasefires <- data_frame(
holiday = 'ceasefire',
ds = ceasefire.initial,
lower_window = 0,
upper_window = 2
)
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
x <- daily$CrimeDate
y <- daily$shootings
m1 <- prophet(data.frame(ds=x,y=y),
yearly.seasonality = T,
weekly.seasonality = T,
mcmc.samples = 500,
holidays = ceasefires,
cores=4)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
Then plot the result. We can see that there are more shootings in recent years, but ceasefires seem to be associated with fewer shootings. (Also, Thursdays are good, and summers are bad.)
future <- make_future_dataframe(m1, periods = 90)
forecast <- predict(m1, future)
plot(m1, forecast)
prophet_plot_components(m1, forecast)
The prophet package doesn’t let us fit poisson likelihoods and doesn’t let us drill down and check the various effects for stuff like statistical significance, so we’re going to fit our own Bayesian model using Stan via the brms package.
# day of the week (monday, tuesday, etc)
daily$weekday <- weekdays(daily$CrimeDate)
# day of the year (0 to 364)
daily$day.of.year <- yday(daily$CrimeDate)
# ceasefire day (1 or 0)
daily$ceasefire <- factor(ifelse(daily$CrimeDate %in% ceasefire.weekends, 1, 0),labels=c("regularDay","ceasefireWeekend"))
# julian day (for time trend)
daily$jul <- julian(daily$CrimeDate)
# z-score it
daily$julz <- scale(daily$jul)[,1]
We’re predicting shootings with a linear time trend, a spline for yearly seasonality, random effects for day of the week, and a binary indicator for the ceasefire. Poisson link function because the outcome is a count and mean is about the same as sd.
fit <- brm(shootings ~ julz +
s(day.of.year) +
weekday +
ceasefire,
data=daily,
cores=4,
family=poisson)
## Compiling the C++ model
## Start sampling
## Warning: There were 8 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
We essentially replicate the prophet model, but can see here that the effect of ceasefire is classically statistically significant. I haven’t tried making these plots pretty yet, but they convey the message.
marginal_effects(fit)
I feel like there’s something to be done with this…
bpd$Neighborhood <- factor(bpd$Neighborhood)
bpd <- subset(bpd, !is.na(Neighborhood))
count <- bpd %>%
group_by(Neighborhood) %>%
summarise(total.count=n())
count <- count[order(count$Neighborhood), ]
count <- subset(count, !is.na(Neighborhood))
nbds <- geojsonio::geojson_read("https://gis-baltimore.opendata.arcgis.com/datasets/1ca93e68f11541d4b59a63243725c4b7_0.geojson",
what = "sp")
getcount <- function(neighborhood){
nbd <- as.character(neighborhood)
nbds <- as.character(count$Neighborhood)
if(nbd %in% nbds){
count <- count[count$Neighborhood == nbd,]$total.count
return(count)
}
if(!(nbd %in% nbds)){
return(0)
}
}
nbds$count <- sapply(nbds$Name, getcount)
# just for reference
range.count <- range(nbds$count,na.rm=T)
labs <- c(0,50,100,150)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')), labs)
leaflet(nbds) %>%
addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
addPolygons(stroke=F,
popup=paste0(nbds$Name,"<br/>Shootings: ",nbds$count),
color=~pal.crime(count),
fillOpacity=.5) %>%
addLegend("bottomright",title="Shootings (2012-present)",colors=~pal.crime(labs),labels=~labs)
nbds$per1k <- nbds$count / nbds$Population * 1000
nbds$per1k <- round(nbds$per1k)
nbds$per1k <- ifelse(nbds$Population == 0, NA, nbds$per1k)
(range.count <- range(nbds$per1k,na.rm=T))
## [1] 0 38
labs <- c(0,10,20,30,40)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')), labs)
nbds$countlabel <- ifelse(nbds$Population == 0, paste0(nbds$Name,":<br/>","No residents"), paste0(nbds$Name,"<br/>",nbds$count," shootings among ",nbds$Population," residents"))
leaflet(nbds) %>%
addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
addPolygons(stroke=T,
weight=1,
popup=nbds$countlabel,
color=~pal.crime(per1k),
fillOpacity=.6) %>%
addLegend("bottomright",title="Shootings per one</br>thousand residents</br>(2012-present)",colors=~pal.crime(labs),labels=~labs)